{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c385d38e-cfd8-400c-b7da-55f0d203343a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 测试simpleRNN、LSTM、GRU三种RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1ef325b4-1b45-473c-a42b-bcb35eb6958a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "059688db-b0e0-44de-8fe3-314cbb387f9c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.datasets import imdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1ec9f9cd-1689-4369-b178-200901c34835",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\conda\\envs\\course_video_cls\\lib\\site-packages\\tensorflow\\python\\keras\\datasets\\imdb.py:155: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
      "  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])\n",
      "C:\\conda\\envs\\course_video_cls\\lib\\site-packages\\tensorflow\\python\\keras\\datasets\\imdb.py:156: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
      "  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])\n"
     ]
    }
   ],
   "source": [
    "# 加载数据\n",
    "\n",
    "# 常用的10000个单词，其他的是OOV（超出词汇）\n",
    "vocabulary = 10000\n",
    "\n",
    "start_char = 1\n",
    "oov_char = 2\n",
    "index_from = 3\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary,start_char=start_char, oov_char=oov_char, index_from=index_from)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "65bc68e3-3837-48b6-85e0-809de8b8b172",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "218"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 打印第一个句子的长度\n",
    "len(x_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b8310797-594d-4a76-9fa5-fcee575bee2b",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1,\n",
       " 14,\n",
       " 22,\n",
       " 16,\n",
       " 43,\n",
       " 530,\n",
       " 973,\n",
       " 1622,\n",
       " 1385,\n",
       " 65,\n",
       " 458,\n",
       " 4468,\n",
       " 66,\n",
       " 3941,\n",
       " 4,\n",
       " 173,\n",
       " 36,\n",
       " 256,\n",
       " 5,\n",
       " 25,\n",
       " 100,\n",
       " 43,\n",
       " 838,\n",
       " 112,\n",
       " 50,\n",
       " 670,\n",
       " 2,\n",
       " 9,\n",
       " 35,\n",
       " 480,\n",
       " 284,\n",
       " 5,\n",
       " 150,\n",
       " 4,\n",
       " 172,\n",
       " 112,\n",
       " 167,\n",
       " 2,\n",
       " 336,\n",
       " 385,\n",
       " 39,\n",
       " 4,\n",
       " 172,\n",
       " 4536,\n",
       " 1111,\n",
       " 17,\n",
       " 546,\n",
       " 38,\n",
       " 13,\n",
       " 447,\n",
       " 4,\n",
       " 192,\n",
       " 50,\n",
       " 16,\n",
       " 6,\n",
       " 147,\n",
       " 2025,\n",
       " 19,\n",
       " 14,\n",
       " 22,\n",
       " 4,\n",
       " 1920,\n",
       " 4613,\n",
       " 469,\n",
       " 4,\n",
       " 22,\n",
       " 71,\n",
       " 87,\n",
       " 12,\n",
       " 16,\n",
       " 43,\n",
       " 530,\n",
       " 38,\n",
       " 76,\n",
       " 15,\n",
       " 13,\n",
       " 1247,\n",
       " 4,\n",
       " 22,\n",
       " 17,\n",
       " 515,\n",
       " 17,\n",
       " 12,\n",
       " 16,\n",
       " 626,\n",
       " 18,\n",
       " 2,\n",
       " 5,\n",
       " 62,\n",
       " 386,\n",
       " 12,\n",
       " 8,\n",
       " 316,\n",
       " 8,\n",
       " 106,\n",
       " 5,\n",
       " 4,\n",
       " 2223,\n",
       " 5244,\n",
       " 16,\n",
       " 480,\n",
       " 66,\n",
       " 3785,\n",
       " 33,\n",
       " 4,\n",
       " 130,\n",
       " 12,\n",
       " 16,\n",
       " 38,\n",
       " 619,\n",
       " 5,\n",
       " 25,\n",
       " 124,\n",
       " 51,\n",
       " 36,\n",
       " 135,\n",
       " 48,\n",
       " 25,\n",
       " 1415,\n",
       " 33,\n",
       " 6,\n",
       " 22,\n",
       " 12,\n",
       " 215,\n",
       " 28,\n",
       " 77,\n",
       " 52,\n",
       " 5,\n",
       " 14,\n",
       " 407,\n",
       " 16,\n",
       " 82,\n",
       " 2,\n",
       " 8,\n",
       " 4,\n",
       " 107,\n",
       " 117,\n",
       " 5952,\n",
       " 15,\n",
       " 256,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 3766,\n",
       " 5,\n",
       " 723,\n",
       " 36,\n",
       " 71,\n",
       " 43,\n",
       " 530,\n",
       " 476,\n",
       " 26,\n",
       " 400,\n",
       " 317,\n",
       " 46,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 1029,\n",
       " 13,\n",
       " 104,\n",
       " 88,\n",
       " 4,\n",
       " 381,\n",
       " 15,\n",
       " 297,\n",
       " 98,\n",
       " 32,\n",
       " 2071,\n",
       " 56,\n",
       " 26,\n",
       " 141,\n",
       " 6,\n",
       " 194,\n",
       " 7486,\n",
       " 18,\n",
       " 4,\n",
       " 226,\n",
       " 22,\n",
       " 21,\n",
       " 134,\n",
       " 476,\n",
       " 26,\n",
       " 480,\n",
       " 5,\n",
       " 144,\n",
       " 30,\n",
       " 5535,\n",
       " 18,\n",
       " 51,\n",
       " 36,\n",
       " 28,\n",
       " 224,\n",
       " 92,\n",
       " 25,\n",
       " 104,\n",
       " 4,\n",
       " 226,\n",
       " 65,\n",
       " 16,\n",
       " 38,\n",
       " 1334,\n",
       " 88,\n",
       " 12,\n",
       " 16,\n",
       " 283,\n",
       " 5,\n",
       " 16,\n",
       " 4472,\n",
       " 113,\n",
       " 103,\n",
       " 32,\n",
       " 15,\n",
       " 16,\n",
       " 5345,\n",
       " 19,\n",
       " 178,\n",
       " 32]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 打印第一个句子\n",
    "x_train[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "37d1aa6c-f5c6-4842-aa97-01a699913a8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 索引和标签词典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "411f607c-a9db-4404-ab3b-6c27f791f001",
   "metadata": {},
   "outputs": [],
   "source": [
    "word_index = imdb.get_word_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "541d6b21-5fd9-423b-8017-3ae4d800ead0",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'fawn': 34701,\n",
       " 'tsukino': 52006,\n",
       " 'nunnery': 52007,\n",
       " 'sonja': 16816,\n",
       " 'vani': 63951,\n",
       " 'woods': 1408,\n",
       " 'spiders': 16115,\n",
       " 'hanging': 2345,\n",
       " 'woody': 2289,\n",
       " 'trawling': 52008,\n",
       " \"hold's\": 52009,\n",
       " 'comically': 11307,\n",
       " 'localized': 40830,\n",
       " 'disobeying': 30568,\n",
       " \"'royale\": 52010,\n",
       " \"harpo's\": 40831,\n",
       " 'canet': 52011,\n",
       " 'aileen': 19313,\n",
       " 'acurately': 52012,\n",
       " \"diplomat's\": 52013,\n",
       " 'rickman': 25242,\n",
       " 'arranged': 6746,\n",
       " 'rumbustious': 52014,\n",
       " 'familiarness': 52015,\n",
       " \"spider'\": 52016,\n",
       " 'hahahah': 68804,\n",
       " \"wood'\": 52017,\n",
       " 'transvestism': 40833,\n",
       " \"hangin'\": 34702,\n",
       " 'bringing': 2338,\n",
       " 'seamier': 40834,\n",
       " 'wooded': 34703,\n",
       " 'bravora': 52018,\n",
       " 'grueling': 16817,\n",
       " 'wooden': 1636,\n",
       " 'wednesday': 16818,\n",
       " \"'prix\": 52019,\n",
       " 'altagracia': 34704,\n",
       " 'circuitry': 52020,\n",
       " 'crotch': 11585,\n",
       " 'busybody': 57766,\n",
       " \"tart'n'tangy\": 52021,\n",
       " 'burgade': 14129,\n",
       " 'thrace': 52023,\n",
       " \"tom's\": 11038,\n",
       " 'snuggles': 52025,\n",
       " 'francesco': 29114,\n",
       " 'complainers': 52027,\n",
       " 'templarios': 52125,\n",
       " '272': 40835,\n",
       " '273': 52028,\n",
       " 'zaniacs': 52130,\n",
       " '275': 34706,\n",
       " 'consenting': 27631,\n",
       " 'snuggled': 40836,\n",
       " 'inanimate': 15492,\n",
       " 'uality': 52030,\n",
       " 'bronte': 11926,\n",
       " 'errors': 4010,\n",
       " 'dialogs': 3230,\n",
       " \"yomada's\": 52031,\n",
       " \"madman's\": 34707,\n",
       " 'dialoge': 30585,\n",
       " 'usenet': 52033,\n",
       " 'videodrome': 40837,\n",
       " \"kid'\": 26338,\n",
       " 'pawed': 52034,\n",
       " \"'girlfriend'\": 30569,\n",
       " \"'pleasure\": 52035,\n",
       " \"'reloaded'\": 52036,\n",
       " \"kazakos'\": 40839,\n",
       " 'rocque': 52037,\n",
       " 'mailings': 52038,\n",
       " 'brainwashed': 11927,\n",
       " 'mcanally': 16819,\n",
       " \"tom''\": 52039,\n",
       " 'kurupt': 25243,\n",
       " 'affiliated': 21905,\n",
       " 'babaganoosh': 52040,\n",
       " \"noe's\": 40840,\n",
       " 'quart': 40841,\n",
       " 'kids': 359,\n",
       " 'uplifting': 5034,\n",
       " 'controversy': 7093,\n",
       " 'kida': 21906,\n",
       " 'kidd': 23379,\n",
       " \"error'\": 52041,\n",
       " 'neurologist': 52042,\n",
       " 'spotty': 18510,\n",
       " 'cobblers': 30570,\n",
       " 'projection': 9878,\n",
       " 'fastforwarding': 40842,\n",
       " 'sters': 52043,\n",
       " \"eggar's\": 52044,\n",
       " 'etherything': 52045,\n",
       " 'gateshead': 40843,\n",
       " 'airball': 34708,\n",
       " 'unsinkable': 25244,\n",
       " 'stern': 7180,\n",
       " \"cervi's\": 52046,\n",
       " 'dnd': 40844,\n",
       " 'dna': 11586,\n",
       " 'insecurity': 20598,\n",
       " \"'reboot'\": 52047,\n",
       " 'trelkovsky': 11037,\n",
       " 'jaekel': 52048,\n",
       " 'sidebars': 52049,\n",
       " \"sforza's\": 52050,\n",
       " 'distortions': 17633,\n",
       " 'mutinies': 52051,\n",
       " 'sermons': 30602,\n",
       " '7ft': 40846,\n",
       " 'boobage': 52052,\n",
       " \"o'bannon's\": 52053,\n",
       " 'populations': 23380,\n",
       " 'chulak': 52054,\n",
       " 'mesmerize': 27633,\n",
       " 'quinnell': 52055,\n",
       " 'yahoo': 10307,\n",
       " 'meteorologist': 52057,\n",
       " 'beswick': 42577,\n",
       " 'boorman': 15493,\n",
       " 'voicework': 40847,\n",
       " \"ster'\": 52058,\n",
       " 'blustering': 22922,\n",
       " 'hj': 52059,\n",
       " 'intake': 27634,\n",
       " 'morally': 5621,\n",
       " 'jumbling': 40849,\n",
       " 'bowersock': 52060,\n",
       " \"'porky's'\": 52061,\n",
       " 'gershon': 16821,\n",
       " 'ludicrosity': 40850,\n",
       " 'coprophilia': 52062,\n",
       " 'expressively': 40851,\n",
       " \"india's\": 19500,\n",
       " \"post's\": 34710,\n",
       " 'wana': 52063,\n",
       " 'wang': 5283,\n",
       " 'wand': 30571,\n",
       " 'wane': 25245,\n",
       " 'edgeways': 52321,\n",
       " 'titanium': 34711,\n",
       " 'pinta': 40852,\n",
       " 'want': 178,\n",
       " 'pinto': 30572,\n",
       " 'whoopdedoodles': 52065,\n",
       " 'tchaikovsky': 21908,\n",
       " 'travel': 2103,\n",
       " \"'victory'\": 52066,\n",
       " 'copious': 11928,\n",
       " 'gouge': 22433,\n",
       " \"chapters'\": 52067,\n",
       " 'barbra': 6702,\n",
       " 'uselessness': 30573,\n",
       " \"wan'\": 52068,\n",
       " 'assimilated': 27635,\n",
       " 'petiot': 16116,\n",
       " 'most\\x85and': 52069,\n",
       " 'dinosaurs': 3930,\n",
       " 'wrong': 352,\n",
       " 'seda': 52070,\n",
       " 'stollen': 52071,\n",
       " 'sentencing': 34712,\n",
       " 'ouroboros': 40853,\n",
       " 'assimilates': 40854,\n",
       " 'colorfully': 40855,\n",
       " 'glenne': 27636,\n",
       " 'dongen': 52072,\n",
       " 'subplots': 4760,\n",
       " 'kiloton': 52073,\n",
       " 'chandon': 23381,\n",
       " \"effect'\": 34713,\n",
       " 'snugly': 27637,\n",
       " 'kuei': 40856,\n",
       " 'welcomed': 9092,\n",
       " 'dishonor': 30071,\n",
       " 'concurrence': 52075,\n",
       " 'stoicism': 23382,\n",
       " \"guys'\": 14896,\n",
       " \"beroemd'\": 52077,\n",
       " 'butcher': 6703,\n",
       " \"melfi's\": 40857,\n",
       " 'aargh': 30623,\n",
       " 'playhouse': 20599,\n",
       " 'wickedly': 11308,\n",
       " 'fit': 1180,\n",
       " 'labratory': 52078,\n",
       " 'lifeline': 40859,\n",
       " 'screaming': 1927,\n",
       " 'fix': 4287,\n",
       " 'cineliterate': 52079,\n",
       " 'fic': 52080,\n",
       " 'fia': 52081,\n",
       " 'fig': 34714,\n",
       " 'fmvs': 52082,\n",
       " 'fie': 52083,\n",
       " 'reentered': 52084,\n",
       " 'fin': 30574,\n",
       " 'doctresses': 52085,\n",
       " 'fil': 52086,\n",
       " 'zucker': 12606,\n",
       " 'ached': 31931,\n",
       " 'counsil': 52088,\n",
       " 'paterfamilias': 52089,\n",
       " 'songwriter': 13885,\n",
       " 'shivam': 34715,\n",
       " 'hurting': 9654,\n",
       " 'effects': 299,\n",
       " 'slauther': 52090,\n",
       " \"'flame'\": 52091,\n",
       " 'sommerset': 52092,\n",
       " 'interwhined': 52093,\n",
       " 'whacking': 27638,\n",
       " 'bartok': 52094,\n",
       " 'barton': 8775,\n",
       " 'frewer': 21909,\n",
       " \"fi'\": 52095,\n",
       " 'ingrid': 6192,\n",
       " 'stribor': 30575,\n",
       " 'approporiately': 52096,\n",
       " 'wobblyhand': 52097,\n",
       " 'tantalisingly': 52098,\n",
       " 'ankylosaurus': 52099,\n",
       " 'parasites': 17634,\n",
       " 'childen': 52100,\n",
       " \"jenkins'\": 52101,\n",
       " 'metafiction': 52102,\n",
       " 'golem': 17635,\n",
       " 'indiscretion': 40860,\n",
       " \"reeves'\": 23383,\n",
       " \"inamorata's\": 57781,\n",
       " 'brittannica': 52104,\n",
       " 'adapt': 7916,\n",
       " \"russo's\": 30576,\n",
       " 'guitarists': 48246,\n",
       " 'abbott': 10553,\n",
       " 'abbots': 40861,\n",
       " 'lanisha': 17649,\n",
       " 'magickal': 40863,\n",
       " 'mattter': 52105,\n",
       " \"'willy\": 52106,\n",
       " 'pumpkins': 34716,\n",
       " 'stuntpeople': 52107,\n",
       " 'estimate': 30577,\n",
       " 'ugghhh': 40864,\n",
       " 'gameplay': 11309,\n",
       " \"wern't\": 52108,\n",
       " \"n'sync\": 40865,\n",
       " 'sickeningly': 16117,\n",
       " 'chiara': 40866,\n",
       " 'disturbed': 4011,\n",
       " 'portmanteau': 40867,\n",
       " 'ineffectively': 52109,\n",
       " \"duchonvey's\": 82143,\n",
       " \"nasty'\": 37519,\n",
       " 'purpose': 1285,\n",
       " 'lazers': 52112,\n",
       " 'lightened': 28105,\n",
       " 'kaliganj': 52113,\n",
       " 'popularism': 52114,\n",
       " \"damme's\": 18511,\n",
       " 'stylistics': 30578,\n",
       " 'mindgaming': 52115,\n",
       " 'spoilerish': 46449,\n",
       " \"'corny'\": 52117,\n",
       " 'boerner': 34718,\n",
       " 'olds': 6792,\n",
       " 'bakelite': 52118,\n",
       " 'renovated': 27639,\n",
       " 'forrester': 27640,\n",
       " \"lumiere's\": 52119,\n",
       " 'gaskets': 52024,\n",
       " 'needed': 884,\n",
       " 'smight': 34719,\n",
       " 'master': 1297,\n",
       " \"edie's\": 25905,\n",
       " 'seeber': 40868,\n",
       " 'hiya': 52120,\n",
       " 'fuzziness': 52121,\n",
       " 'genesis': 14897,\n",
       " 'rewards': 12607,\n",
       " 'enthrall': 30579,\n",
       " \"'about\": 40869,\n",
       " \"recollection's\": 52122,\n",
       " 'mutilated': 11039,\n",
       " 'fatherlands': 52123,\n",
       " \"fischer's\": 52124,\n",
       " 'positively': 5399,\n",
       " '270': 34705,\n",
       " 'ahmed': 34720,\n",
       " 'zatoichi': 9836,\n",
       " 'bannister': 13886,\n",
       " 'anniversaries': 52127,\n",
       " \"helm's\": 30580,\n",
       " \"'work'\": 52128,\n",
       " 'exclaimed': 34721,\n",
       " \"'unfunny'\": 52129,\n",
       " '274': 52029,\n",
       " 'feeling': 544,\n",
       " \"wanda's\": 52131,\n",
       " 'dolan': 33266,\n",
       " '278': 52133,\n",
       " 'peacoat': 52134,\n",
       " 'brawny': 40870,\n",
       " 'mishra': 40871,\n",
       " 'worlders': 40872,\n",
       " 'protags': 52135,\n",
       " 'skullcap': 52136,\n",
       " 'dastagir': 57596,\n",
       " 'affairs': 5622,\n",
       " 'wholesome': 7799,\n",
       " 'hymen': 52137,\n",
       " 'paramedics': 25246,\n",
       " 'unpersons': 52138,\n",
       " 'heavyarms': 52139,\n",
       " 'affaire': 52140,\n",
       " 'coulisses': 52141,\n",
       " 'hymer': 40873,\n",
       " 'kremlin': 52142,\n",
       " 'shipments': 30581,\n",
       " 'pixilated': 52143,\n",
       " \"'00s\": 30582,\n",
       " 'diminishing': 18512,\n",
       " 'cinematic': 1357,\n",
       " 'resonates': 14898,\n",
       " 'simplify': 40874,\n",
       " \"nature'\": 40875,\n",
       " 'temptresses': 40876,\n",
       " 'reverence': 16822,\n",
       " 'resonated': 19502,\n",
       " 'dailey': 34722,\n",
       " '2\\x85': 52144,\n",
       " 'treize': 27641,\n",
       " 'majo': 52145,\n",
       " 'kiya': 21910,\n",
       " 'woolnough': 52146,\n",
       " 'thanatos': 39797,\n",
       " 'sandoval': 35731,\n",
       " 'dorama': 40879,\n",
       " \"o'shaughnessy\": 52147,\n",
       " 'tech': 4988,\n",
       " 'fugitives': 32018,\n",
       " 'teck': 30583,\n",
       " \"'e'\": 76125,\n",
       " 'doesn’t': 40881,\n",
       " 'purged': 52149,\n",
       " 'saying': 657,\n",
       " \"martians'\": 41095,\n",
       " 'norliss': 23418,\n",
       " 'dickey': 27642,\n",
       " 'dicker': 52152,\n",
       " \"'sependipity\": 52153,\n",
       " 'padded': 8422,\n",
       " 'ordell': 57792,\n",
       " \"sturges'\": 40882,\n",
       " 'independentcritics': 52154,\n",
       " 'tempted': 5745,\n",
       " \"atkinson's\": 34724,\n",
       " 'hounded': 25247,\n",
       " 'apace': 52155,\n",
       " 'clicked': 15494,\n",
       " \"'humor'\": 30584,\n",
       " \"martino's\": 17177,\n",
       " \"'supporting\": 52156,\n",
       " 'warmongering': 52032,\n",
       " \"zemeckis's\": 34725,\n",
       " 'lube': 21911,\n",
       " 'shocky': 52157,\n",
       " 'plate': 7476,\n",
       " 'plata': 40883,\n",
       " 'sturgess': 40884,\n",
       " \"nerds'\": 40885,\n",
       " 'plato': 20600,\n",
       " 'plath': 34726,\n",
       " 'platt': 40886,\n",
       " 'mcnab': 52159,\n",
       " 'clumsiness': 27643,\n",
       " 'altogether': 3899,\n",
       " 'massacring': 42584,\n",
       " 'bicenntinial': 52160,\n",
       " 'skaal': 40887,\n",
       " 'droning': 14360,\n",
       " 'lds': 8776,\n",
       " 'jaguar': 21912,\n",
       " \"cale's\": 34727,\n",
       " 'nicely': 1777,\n",
       " 'mummy': 4588,\n",
       " \"lot's\": 18513,\n",
       " 'patch': 10086,\n",
       " 'kerkhof': 50202,\n",
       " \"leader's\": 52161,\n",
       " \"'movie\": 27644,\n",
       " 'uncomfirmed': 52162,\n",
       " 'heirloom': 40888,\n",
       " 'wrangle': 47360,\n",
       " 'emotion\\x85': 52163,\n",
       " \"'stargate'\": 52164,\n",
       " 'pinoy': 40889,\n",
       " 'conchatta': 40890,\n",
       " 'broeke': 41128,\n",
       " 'advisedly': 40891,\n",
       " \"barker's\": 17636,\n",
       " 'descours': 52166,\n",
       " 'lots': 772,\n",
       " 'lotr': 9259,\n",
       " 'irs': 9879,\n",
       " 'lott': 52167,\n",
       " 'xvi': 40892,\n",
       " 'irk': 34728,\n",
       " 'irl': 52168,\n",
       " 'ira': 6887,\n",
       " 'belzer': 21913,\n",
       " 'irc': 52169,\n",
       " 'ire': 27645,\n",
       " 'requisites': 40893,\n",
       " 'discipline': 7693,\n",
       " 'lyoko': 52961,\n",
       " 'extend': 11310,\n",
       " 'nature': 873,\n",
       " \"'dickie'\": 52170,\n",
       " 'optimist': 40894,\n",
       " 'lapping': 30586,\n",
       " 'superficial': 3900,\n",
       " 'vestment': 52171,\n",
       " 'extent': 2823,\n",
       " 'tendons': 52172,\n",
       " \"heller's\": 52173,\n",
       " 'quagmires': 52174,\n",
       " 'miyako': 52175,\n",
       " 'moocow': 20601,\n",
       " \"coles'\": 52176,\n",
       " 'lookit': 40895,\n",
       " 'ravenously': 52177,\n",
       " 'levitating': 40896,\n",
       " 'perfunctorily': 52178,\n",
       " 'lookin': 30587,\n",
       " \"lot'\": 40898,\n",
       " 'lookie': 52179,\n",
       " 'fearlessly': 34870,\n",
       " 'libyan': 52181,\n",
       " 'fondles': 40899,\n",
       " 'gopher': 35714,\n",
       " 'wearying': 40901,\n",
       " \"nz's\": 52182,\n",
       " 'minuses': 27646,\n",
       " 'puposelessly': 52183,\n",
       " 'shandling': 52184,\n",
       " 'decapitates': 31268,\n",
       " 'humming': 11929,\n",
       " \"'nother\": 40902,\n",
       " 'smackdown': 21914,\n",
       " 'underdone': 30588,\n",
       " 'frf': 40903,\n",
       " 'triviality': 52185,\n",
       " 'fro': 25248,\n",
       " 'bothers': 8777,\n",
       " \"'kensington\": 52186,\n",
       " 'much': 73,\n",
       " 'muco': 34730,\n",
       " 'wiseguy': 22615,\n",
       " \"richie's\": 27648,\n",
       " 'tonino': 40904,\n",
       " 'unleavened': 52187,\n",
       " 'fry': 11587,\n",
       " \"'tv'\": 40905,\n",
       " 'toning': 40906,\n",
       " 'obese': 14361,\n",
       " 'sensationalized': 30589,\n",
       " 'spiv': 40907,\n",
       " 'spit': 6259,\n",
       " 'arkin': 7364,\n",
       " 'charleton': 21915,\n",
       " 'jeon': 16823,\n",
       " 'boardroom': 21916,\n",
       " 'doubts': 4989,\n",
       " 'spin': 3084,\n",
       " 'hepo': 53083,\n",
       " 'wildcat': 27649,\n",
       " 'venoms': 10584,\n",
       " 'misconstrues': 52191,\n",
       " 'mesmerising': 18514,\n",
       " 'misconstrued': 40908,\n",
       " 'rescinds': 52192,\n",
       " 'prostrate': 52193,\n",
       " 'majid': 40909,\n",
       " 'climbed': 16479,\n",
       " 'canoeing': 34731,\n",
       " 'majin': 52195,\n",
       " 'animie': 57804,\n",
       " 'sylke': 40910,\n",
       " 'conditioned': 14899,\n",
       " 'waddell': 40911,\n",
       " '3\\x85': 52196,\n",
       " 'hyperdrive': 41188,\n",
       " 'conditioner': 34732,\n",
       " 'bricklayer': 53153,\n",
       " 'hong': 2576,\n",
       " 'memoriam': 52198,\n",
       " 'inventively': 30592,\n",
       " \"levant's\": 25249,\n",
       " 'portobello': 20638,\n",
       " 'remand': 52200,\n",
       " 'mummified': 19504,\n",
       " 'honk': 27650,\n",
       " 'spews': 19505,\n",
       " 'visitations': 40912,\n",
       " 'mummifies': 52201,\n",
       " 'cavanaugh': 25250,\n",
       " 'zeon': 23385,\n",
       " \"jungle's\": 40913,\n",
       " 'viertel': 34733,\n",
       " 'frenchmen': 27651,\n",
       " 'torpedoes': 52202,\n",
       " 'schlessinger': 52203,\n",
       " 'torpedoed': 34734,\n",
       " 'blister': 69876,\n",
       " 'cinefest': 52204,\n",
       " 'furlough': 34735,\n",
       " 'mainsequence': 52205,\n",
       " 'mentors': 40914,\n",
       " 'academic': 9094,\n",
       " 'stillness': 20602,\n",
       " 'academia': 40915,\n",
       " 'lonelier': 52206,\n",
       " 'nibby': 52207,\n",
       " \"losers'\": 52208,\n",
       " 'cineastes': 40916,\n",
       " 'corporate': 4449,\n",
       " 'massaging': 40917,\n",
       " 'bellow': 30593,\n",
       " 'absurdities': 19506,\n",
       " 'expetations': 53241,\n",
       " 'nyfiken': 40918,\n",
       " 'mehras': 75638,\n",
       " 'lasse': 52209,\n",
       " 'visability': 52210,\n",
       " 'militarily': 33946,\n",
       " \"elder'\": 52211,\n",
       " 'gainsbourg': 19023,\n",
       " 'hah': 20603,\n",
       " 'hai': 13420,\n",
       " 'haj': 34736,\n",
       " 'hak': 25251,\n",
       " 'hal': 4311,\n",
       " 'ham': 4892,\n",
       " 'duffer': 53259,\n",
       " 'haa': 52213,\n",
       " 'had': 66,\n",
       " 'advancement': 11930,\n",
       " 'hag': 16825,\n",
       " \"hand'\": 25252,\n",
       " 'hay': 13421,\n",
       " 'mcnamara': 20604,\n",
       " \"mozart's\": 52214,\n",
       " 'duffel': 30731,\n",
       " 'haq': 30594,\n",
       " 'har': 13887,\n",
       " 'has': 44,\n",
       " 'hat': 2401,\n",
       " 'hav': 40919,\n",
       " 'haw': 30595,\n",
       " 'figtings': 52215,\n",
       " 'elders': 15495,\n",
       " 'underpanted': 52216,\n",
       " 'pninson': 52217,\n",
       " 'unequivocally': 27652,\n",
       " \"barbara's\": 23673,\n",
       " \"bello'\": 52219,\n",
       " 'indicative': 12997,\n",
       " 'yawnfest': 40920,\n",
       " 'hexploitation': 52220,\n",
       " \"loder's\": 52221,\n",
       " 'sleuthing': 27653,\n",
       " \"justin's\": 32622,\n",
       " \"'ball\": 52222,\n",
       " \"'summer\": 52223,\n",
       " \"'demons'\": 34935,\n",
       " \"mormon's\": 52225,\n",
       " \"laughton's\": 34737,\n",
       " 'debell': 52226,\n",
       " 'shipyard': 39724,\n",
       " 'unabashedly': 30597,\n",
       " 'disks': 40401,\n",
       " 'crowd': 2290,\n",
       " 'crowe': 10087,\n",
       " \"vancouver's\": 56434,\n",
       " 'mosques': 34738,\n",
       " 'crown': 6627,\n",
       " 'culpas': 52227,\n",
       " 'crows': 27654,\n",
       " 'surrell': 53344,\n",
       " 'flowless': 52229,\n",
       " 'sheirk': 52230,\n",
       " \"'three\": 40923,\n",
       " \"peterson'\": 52231,\n",
       " 'ooverall': 52232,\n",
       " 'perchance': 40924,\n",
       " 'bottom': 1321,\n",
       " 'chabert': 53363,\n",
       " 'sneha': 52233,\n",
       " 'inhuman': 13888,\n",
       " 'ichii': 52234,\n",
       " 'ursla': 52235,\n",
       " 'completly': 30598,\n",
       " 'moviedom': 40925,\n",
       " 'raddick': 52236,\n",
       " 'brundage': 51995,\n",
       " 'brigades': 40926,\n",
       " 'starring': 1181,\n",
       " \"'goal'\": 52237,\n",
       " 'caskets': 52238,\n",
       " 'willcock': 52239,\n",
       " \"threesome's\": 52240,\n",
       " \"mosque'\": 52241,\n",
       " \"cover's\": 52242,\n",
       " 'spaceships': 17637,\n",
       " 'anomalous': 40927,\n",
       " 'ptsd': 27655,\n",
       " 'shirdan': 52243,\n",
       " 'obscenity': 21962,\n",
       " 'lemmings': 30599,\n",
       " 'duccio': 30600,\n",
       " \"levene's\": 52244,\n",
       " \"'gorby'\": 52245,\n",
       " \"teenager's\": 25255,\n",
       " 'marshall': 5340,\n",
       " 'honeymoon': 9095,\n",
       " 'shoots': 3231,\n",
       " 'despised': 12258,\n",
       " 'okabasho': 52246,\n",
       " 'fabric': 8289,\n",
       " 'cannavale': 18515,\n",
       " 'raped': 3537,\n",
       " \"tutt's\": 52247,\n",
       " 'grasping': 17638,\n",
       " 'despises': 18516,\n",
       " \"thief's\": 40928,\n",
       " 'rapes': 8926,\n",
       " 'raper': 52248,\n",
       " \"eyre'\": 27656,\n",
       " 'walchek': 52249,\n",
       " \"elmo's\": 23386,\n",
       " 'perfumes': 40929,\n",
       " 'spurting': 21918,\n",
       " \"exposition'\\x85\": 52250,\n",
       " 'denoting': 52251,\n",
       " 'thesaurus': 34740,\n",
       " \"shoot'\": 40930,\n",
       " 'bonejack': 49759,\n",
       " 'simpsonian': 52253,\n",
       " 'hebetude': 30601,\n",
       " \"hallow's\": 34741,\n",
       " 'desperation\\x85': 52254,\n",
       " 'incinerator': 34742,\n",
       " 'congratulations': 10308,\n",
       " 'humbled': 52255,\n",
       " \"else's\": 5924,\n",
       " 'trelkovski': 40845,\n",
       " \"rape'\": 52256,\n",
       " \"'chapters'\": 59386,\n",
       " '1600s': 52257,\n",
       " 'martian': 7253,\n",
       " 'nicest': 25256,\n",
       " 'eyred': 52259,\n",
       " 'passenger': 9457,\n",
       " 'disgrace': 6041,\n",
       " 'moderne': 52260,\n",
       " 'barrymore': 5120,\n",
       " 'yankovich': 52261,\n",
       " 'moderns': 40931,\n",
       " 'studliest': 52262,\n",
       " 'bedsheet': 52263,\n",
       " 'decapitation': 14900,\n",
       " 'slurring': 52264,\n",
       " \"'nunsploitation'\": 52265,\n",
       " \"'character'\": 34743,\n",
       " 'cambodia': 9880,\n",
       " 'rebelious': 52266,\n",
       " 'pasadena': 27657,\n",
       " 'crowne': 40932,\n",
       " \"'bedchamber\": 52267,\n",
       " 'conjectural': 52268,\n",
       " 'appologize': 52269,\n",
       " 'halfassing': 52270,\n",
       " 'paycheque': 57816,\n",
       " 'palms': 20606,\n",
       " \"'islands\": 52271,\n",
       " 'hawked': 40933,\n",
       " 'palme': 21919,\n",
       " 'conservatively': 40934,\n",
       " 'larp': 64007,\n",
       " 'palma': 5558,\n",
       " 'smelling': 21920,\n",
       " 'aragorn': 12998,\n",
       " 'hawker': 52272,\n",
       " 'hawkes': 52273,\n",
       " 'explosions': 3975,\n",
       " 'loren': 8059,\n",
       " \"pyle's\": 52274,\n",
       " 'shootout': 6704,\n",
       " \"mike's\": 18517,\n",
       " \"driscoll's\": 52275,\n",
       " 'cogsworth': 40935,\n",
       " \"britian's\": 52276,\n",
       " 'childs': 34744,\n",
       " \"portrait's\": 52277,\n",
       " 'chain': 3626,\n",
       " 'whoever': 2497,\n",
       " 'puttered': 52278,\n",
       " 'childe': 52279,\n",
       " 'maywether': 52280,\n",
       " 'chair': 3036,\n",
       " \"rance's\": 52281,\n",
       " 'machu': 34745,\n",
       " 'ballet': 4517,\n",
       " 'grapples': 34746,\n",
       " 'summerize': 76152,\n",
       " 'freelance': 30603,\n",
       " \"andrea's\": 52283,\n",
       " '\\x91very': 52284,\n",
       " 'coolidge': 45879,\n",
       " 'mache': 18518,\n",
       " 'balled': 52285,\n",
       " 'grappled': 40937,\n",
       " 'macha': 18519,\n",
       " 'underlining': 21921,\n",
       " 'macho': 5623,\n",
       " 'oversight': 19507,\n",
       " 'machi': 25257,\n",
       " 'verbally': 11311,\n",
       " 'tenacious': 21922,\n",
       " 'windshields': 40938,\n",
       " 'paychecks': 18557,\n",
       " 'jerk': 3396,\n",
       " \"good'\": 11931,\n",
       " 'prancer': 34748,\n",
       " 'prances': 21923,\n",
       " 'olympus': 52286,\n",
       " 'lark': 21924,\n",
       " 'embark': 10785,\n",
       " 'gloomy': 7365,\n",
       " 'jehaan': 52287,\n",
       " 'turaqui': 52288,\n",
       " \"child'\": 20607,\n",
       " 'locked': 2894,\n",
       " 'pranced': 52289,\n",
       " 'exact': 2588,\n",
       " 'unattuned': 52290,\n",
       " 'minute': 783,\n",
       " 'skewed': 16118,\n",
       " 'hodgins': 40940,\n",
       " 'skewer': 34749,\n",
       " 'think\\x85': 52291,\n",
       " 'rosenstein': 38765,\n",
       " 'helmit': 52292,\n",
       " 'wrestlemanias': 34750,\n",
       " 'hindered': 16826,\n",
       " \"martha's\": 30604,\n",
       " 'cheree': 52293,\n",
       " \"pluckin'\": 52294,\n",
       " 'ogles': 40941,\n",
       " 'heavyweight': 11932,\n",
       " 'aada': 82190,\n",
       " 'chopping': 11312,\n",
       " 'strongboy': 61534,\n",
       " 'hegemonic': 41342,\n",
       " 'adorns': 40942,\n",
       " 'xxth': 41346,\n",
       " 'nobuhiro': 34751,\n",
       " 'capitães': 52298,\n",
       " 'kavogianni': 52299,\n",
       " 'antwerp': 13422,\n",
       " 'celebrated': 6538,\n",
       " 'roarke': 52300,\n",
       " 'baggins': 40943,\n",
       " 'cheeseburgers': 31270,\n",
       " 'matras': 52301,\n",
       " \"nineties'\": 52302,\n",
       " \"'craig'\": 52303,\n",
       " 'celebrates': 12999,\n",
       " 'unintentionally': 3383,\n",
       " 'drafted': 14362,\n",
       " 'climby': 52304,\n",
       " '303': 52305,\n",
       " 'oldies': 18520,\n",
       " 'climbs': 9096,\n",
       " 'honour': 9655,\n",
       " 'plucking': 34752,\n",
       " '305': 30074,\n",
       " 'address': 5514,\n",
       " 'menjou': 40944,\n",
       " \"'freak'\": 42592,\n",
       " 'dwindling': 19508,\n",
       " 'benson': 9458,\n",
       " 'white’s': 52307,\n",
       " 'shamelessness': 40945,\n",
       " 'impacted': 21925,\n",
       " 'upatz': 52308,\n",
       " 'cusack': 3840,\n",
       " \"flavia's\": 37567,\n",
       " 'effette': 52309,\n",
       " 'influx': 34753,\n",
       " 'boooooooo': 52310,\n",
       " 'dimitrova': 52311,\n",
       " 'houseman': 13423,\n",
       " 'bigas': 25259,\n",
       " 'boylen': 52312,\n",
       " 'phillipenes': 52313,\n",
       " 'fakery': 40946,\n",
       " \"grandpa's\": 27658,\n",
       " 'darnell': 27659,\n",
       " 'undergone': 19509,\n",
       " 'handbags': 52315,\n",
       " 'perished': 21926,\n",
       " 'pooped': 37778,\n",
       " 'vigour': 27660,\n",
       " 'opposed': 3627,\n",
       " 'etude': 52316,\n",
       " \"caine's\": 11799,\n",
       " 'doozers': 52317,\n",
       " 'photojournals': 34754,\n",
       " 'perishes': 52318,\n",
       " 'constrains': 34755,\n",
       " 'migenes': 40948,\n",
       " 'consoled': 30605,\n",
       " 'alastair': 16827,\n",
       " 'wvs': 52319,\n",
       " 'ooooooh': 52320,\n",
       " 'approving': 34756,\n",
       " 'consoles': 40949,\n",
       " 'disparagement': 52064,\n",
       " 'futureistic': 52322,\n",
       " 'rebounding': 52323,\n",
       " \"'date\": 52324,\n",
       " 'gregoire': 52325,\n",
       " 'rutherford': 21927,\n",
       " 'americanised': 34757,\n",
       " 'novikov': 82196,\n",
       " 'following': 1042,\n",
       " 'munroe': 34758,\n",
       " \"morita'\": 52326,\n",
       " 'christenssen': 52327,\n",
       " 'oatmeal': 23106,\n",
       " 'fossey': 25260,\n",
       " 'livered': 40950,\n",
       " 'listens': 13000,\n",
       " \"'marci\": 76164,\n",
       " \"otis's\": 52330,\n",
       " 'thanking': 23387,\n",
       " 'maude': 16019,\n",
       " 'extensions': 34759,\n",
       " 'ameteurish': 52332,\n",
       " \"commender's\": 52333,\n",
       " 'agricultural': 27661,\n",
       " 'convincingly': 4518,\n",
       " 'fueled': 17639,\n",
       " 'mahattan': 54014,\n",
       " \"paris's\": 40952,\n",
       " 'vulkan': 52336,\n",
       " 'stapes': 52337,\n",
       " 'odysessy': 52338,\n",
       " 'harmon': 12259,\n",
       " 'surfing': 4252,\n",
       " 'halloran': 23494,\n",
       " 'unbelieveably': 49580,\n",
       " \"'offed'\": 52339,\n",
       " 'quadrant': 30607,\n",
       " 'inhabiting': 19510,\n",
       " 'nebbish': 34760,\n",
       " 'forebears': 40953,\n",
       " 'skirmish': 34761,\n",
       " 'ocassionally': 52340,\n",
       " \"'resist\": 52341,\n",
       " 'impactful': 21928,\n",
       " 'spicier': 52342,\n",
       " 'touristy': 40954,\n",
       " \"'football'\": 52343,\n",
       " 'webpage': 40955,\n",
       " 'exurbia': 52345,\n",
       " 'jucier': 52346,\n",
       " 'professors': 14901,\n",
       " 'structuring': 34762,\n",
       " 'jig': 30608,\n",
       " 'overlord': 40956,\n",
       " 'disconnect': 25261,\n",
       " 'sniffle': 82201,\n",
       " 'slimeball': 40957,\n",
       " 'jia': 40958,\n",
       " 'milked': 16828,\n",
       " 'banjoes': 40959,\n",
       " 'jim': 1237,\n",
       " 'workforces': 52348,\n",
       " 'jip': 52349,\n",
       " 'rotweiller': 52350,\n",
       " 'mundaneness': 34763,\n",
       " \"'ninja'\": 52351,\n",
       " \"dead'\": 11040,\n",
       " \"cipriani's\": 40960,\n",
       " 'modestly': 20608,\n",
       " \"professor'\": 52352,\n",
       " 'shacked': 40961,\n",
       " 'bashful': 34764,\n",
       " 'sorter': 23388,\n",
       " 'overpowering': 16120,\n",
       " 'workmanlike': 18521,\n",
       " 'henpecked': 27662,\n",
       " 'sorted': 18522,\n",
       " \"jōb's\": 52354,\n",
       " \"'always\": 52355,\n",
       " \"'baptists\": 34765,\n",
       " 'dreamcatchers': 52356,\n",
       " \"'silence'\": 52357,\n",
       " 'hickory': 21929,\n",
       " 'fun\\x97yet': 52358,\n",
       " 'breakumentary': 52359,\n",
       " 'didn': 15496,\n",
       " 'didi': 52360,\n",
       " 'pealing': 52361,\n",
       " 'dispite': 40962,\n",
       " \"italy's\": 25262,\n",
       " 'instability': 21930,\n",
       " 'quarter': 6539,\n",
       " 'quartet': 12608,\n",
       " 'padmé': 52362,\n",
       " \"'bleedmedry\": 52363,\n",
       " 'pahalniuk': 52364,\n",
       " 'honduras': 52365,\n",
       " 'bursting': 10786,\n",
       " \"pablo's\": 41465,\n",
       " 'irremediably': 52367,\n",
       " 'presages': 40963,\n",
       " 'bowlegged': 57832,\n",
       " 'dalip': 65183,\n",
       " 'entering': 6260,\n",
       " 'newsradio': 76172,\n",
       " 'presaged': 54150,\n",
       " \"giallo's\": 27663,\n",
       " 'bouyant': 40964,\n",
       " 'amerterish': 52368,\n",
       " 'rajni': 18523,\n",
       " 'leeves': 30610,\n",
       " 'macauley': 34767,\n",
       " 'seriously': 612,\n",
       " 'sugercoma': 52369,\n",
       " 'grimstead': 52370,\n",
       " \"'fairy'\": 52371,\n",
       " 'zenda': 30611,\n",
       " \"'twins'\": 52372,\n",
       " 'realisation': 17640,\n",
       " 'highsmith': 27664,\n",
       " 'raunchy': 7817,\n",
       " 'incentives': 40965,\n",
       " 'flatson': 52374,\n",
       " 'snooker': 35097,\n",
       " 'crazies': 16829,\n",
       " 'crazier': 14902,\n",
       " 'grandma': 7094,\n",
       " 'napunsaktha': 52375,\n",
       " 'workmanship': 30612,\n",
       " 'reisner': 52376,\n",
       " \"sanford's\": 61306,\n",
       " '\\x91doña': 52377,\n",
       " 'modest': 6108,\n",
       " \"everything's\": 19153,\n",
       " 'hamer': 40966,\n",
       " \"couldn't'\": 52379,\n",
       " 'quibble': 13001,\n",
       " 'socking': 52380,\n",
       " 'tingler': 21931,\n",
       " 'gutman': 52381,\n",
       " 'lachlan': 40967,\n",
       " 'tableaus': 52382,\n",
       " 'headbanger': 52383,\n",
       " 'spoken': 2847,\n",
       " 'cerebrally': 34768,\n",
       " \"'road\": 23490,\n",
       " 'tableaux': 21932,\n",
       " \"proust's\": 40968,\n",
       " 'periodical': 40969,\n",
       " \"shoveller's\": 52385,\n",
       " 'tamara': 25263,\n",
       " 'affords': 17641,\n",
       " 'concert': 3249,\n",
       " \"yara's\": 87955,\n",
       " 'someome': 52386,\n",
       " 'lingering': 8424,\n",
       " \"abraham's\": 41511,\n",
       " 'beesley': 34769,\n",
       " 'cherbourg': 34770,\n",
       " 'kagan': 28624,\n",
       " 'snatch': 9097,\n",
       " \"miyazaki's\": 9260,\n",
       " 'absorbs': 25264,\n",
       " \"koltai's\": 40970,\n",
       " 'tingled': 64027,\n",
       " 'crossroads': 19511,\n",
       " 'rehab': 16121,\n",
       " 'falworth': 52389,\n",
       " 'sequals': 52390,\n",
       " ...}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c0c6bfb4-3c5b-4ed5-923d-9abdd801979d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fawn 34701\n"
     ]
    }
   ],
   "source": [
    "# 遍历查看\n",
    "for key,val in word_index.items():\n",
    "    print(key,val)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7b183847-f47f-4eea-ae59-22b5555778ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 交换key和val\n",
    "inverted_word_index = dict(\n",
    "    (i + index_from, word) for (word, i) in word_index.items()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "51e76da1-5741-4680-8314-0ab0cecee281",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"harpo's\""
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inverted_word_index[40834]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e88118a7-fad2-41fe-96b2-03abd7d6ec3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 解析第一个句子\n",
    "inverted_word_index[start_char] = \"[START]\"\n",
    "inverted_word_index[oov_char] = \"[OOV]\"\n",
    "\n",
    "\n",
    "decoded_sequence = \" \".join(inverted_word_index[i] for i in x_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7c1822f4-7038-48be-abe6-809424dd762d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"[START] this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert [OOV] is an amazing actor and now the same being director [OOV] father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for [OOV] and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also [OOV] to the two little boy's that played the [OOV] of norman and paul they were just brilliant children are often left out of the [OOV] list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all\""
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decoded_sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "fa3361ce-cf20-4fdb-92f8-0052f77997d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 统计长度\n",
    "len_list = []\n",
    "for i in x_train:\n",
    "    len_list.append(len(i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "df75e4b5-9fdf-4c6a-ba31-0abdca339b42",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d18d11dc-f541-40b0-abb2-01c81f419d03",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "238.71364"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array(len_list).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3515e2bf-cc0e-4b92-8a64-798987dd323d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 补白\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "3ae7dd2a-41dc-4ad8-96fc-36b0372eb78e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 最大长度\n",
    "word_num = 250\n",
    "x_train = pad_sequences(x_train, maxlen=word_num)\n",
    "x_test = pad_sequences(x_test, maxlen=word_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "6a42643c-1924-45b9-b782-fd5b010458b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((25000, 250), (25000, 250))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train.shape,x_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79674c7e-ad88-44d9-85ee-ac7dae59ecd0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 搭建模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "7b1bfcc4-ff21-44fb-b60d-971f1bf90044",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense, Embedding,GRU,Activation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5d8dc9fc-71d6-4cd4-a099-7b66d07fd6ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25000, 250)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "467dbe36-cf32-40f9-a232-8131f3c79dc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "embed_dim = 32\n",
    "state_dim = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "49e4360b-8e02-4bbc-959f-4bc733f3948e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义Many-to-one模型\n",
    "rnn = Sequential([\n",
    "    # 输入大小\n",
    "    Embedding(input_dim=vocabulary,output_dim=embed_dim,input_length=word_num),\n",
    "    # 只需要最后一个输出向量\n",
    "    GRU(state_dim, return_sequences=False),\n",
    "    Dense(1,activation='sigmoid')\n",
    "])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "96c60651-e30e-4eef-bedf-3b633303e2b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ! pip install numpy==1.19.5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "128984b7-cf78-4dc8-8e24-5b1bfbb7ceb1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding (Embedding)        (None, 250, 32)           320000    \n",
      "_________________________________________________________________\n",
      "gru (GRU)                    (None, 32)                6336      \n",
      "_________________________________________________________________\n",
      "dense (Dense)                (None, 1)                 33        \n",
      "=================================================================\n",
      "Total params: 326,369\n",
      "Trainable params: 326,369\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "rnn.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "01867809-6afa-498c-8c13-c4d7523241d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6208"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 参数数量\n",
    "64*(64+32)+64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "534c196f-bed0-46fa-90c4-ac11d89c6497",
   "metadata": {},
   "outputs": [],
   "source": [
    "rnn.compile(loss=\"binary_crossentropy\",optimizer=\"rmsprop\",metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be83818a-d4c2-41ff-8f48-8147b5ee0b52",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "cdf17d58-c20c-4617-b67b-841b7ed5d287",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "196/196 [==============================] - 4s 20ms/step - loss: 0.5145 - accuracy: 0.7417 - val_loss: 0.4155 - val_accuracy: 0.8058\n",
      "Epoch 2/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.3100 - accuracy: 0.8730 - val_loss: 0.3148 - val_accuracy: 0.8702\n",
      "Epoch 3/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.2524 - accuracy: 0.9003 - val_loss: 0.3030 - val_accuracy: 0.8740\n",
      "Epoch 4/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.2169 - accuracy: 0.9156 - val_loss: 0.3538 - val_accuracy: 0.8462\n",
      "Epoch 5/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1924 - accuracy: 0.9286 - val_loss: 0.3590 - val_accuracy: 0.8674\n",
      "Epoch 6/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1751 - accuracy: 0.9350 - val_loss: 0.4251 - val_accuracy: 0.8382\n",
      "Epoch 7/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1600 - accuracy: 0.9412 - val_loss: 0.3795 - val_accuracy: 0.8526\n",
      "Epoch 8/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1487 - accuracy: 0.9455 - val_loss: 0.4026 - val_accuracy: 0.8521\n",
      "Epoch 9/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1371 - accuracy: 0.9502 - val_loss: 0.3568 - val_accuracy: 0.8539\n",
      "Epoch 10/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1307 - accuracy: 0.9526 - val_loss: 0.4305 - val_accuracy: 0.8438\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x210208f1d30>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rnn.fit(x_train, y_train,\n",
    "          batch_size=128,\n",
    "          epochs=10,\n",
    "          validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "095ffcd3-f5bb-4d29-9a09-1855a8c40cc7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "196/196 [==============================] - 1s 5ms/step - loss: 0.4305 - accuracy: 0.8438\n"
     ]
    }
   ],
   "source": [
    "score, acc = rnn.evaluate(x_test, y_test, batch_size=128)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4af105bc-c921-4e23-a52c-287c452454a6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01d47f78-d23b-45db-a670-16e8337923fc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "f9d6b8e6-d362-4b32-947a-3fa6d779b320",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.layers import Flatten"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "e147a10a-c5bc-4edb-9843-bfdce1a961b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义Many-to-many模型\n",
    "rnn_new = Sequential([\n",
    "    # Flatten大小\n",
    "    Embedding(input_dim=vocabulary,output_dim=embed_dim,input_length=word_num),\n",
    "    # 输出sequence（矩阵）\n",
    "    GRU(state_dim, return_sequences=True),\n",
    "    # 拉直\n",
    "    Flatten(),\n",
    "    Dense(1,activation='sigmoid')\n",
    "])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a9773cf6-003d-4d32-84f5-04f14a0afd9c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_1\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_2 (Embedding)      (None, 250, 32)           320000    \n",
      "_________________________________________________________________\n",
      "gru_2 (GRU)                  (None, 250, 32)           6336      \n",
      "_________________________________________________________________\n",
      "flatten (Flatten)            (None, 8000)              0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1)                 8001      \n",
      "=================================================================\n",
      "Total params: 334,337\n",
      "Trainable params: 334,337\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "rnn_new.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "539a6d41-60e0-42f1-bfec-6e4629f571fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "rnn_new.compile(loss=\"binary_crossentropy\",optimizer=\"rmsprop\",metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "a05444a5-63d2-4f67-8285-9ee0262ea73a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "196/196 [==============================] - 4s 20ms/step - loss: 0.4260 - accuracy: 0.7859 - val_loss: 0.2855 - val_accuracy: 0.8811\n",
      "Epoch 2/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.2482 - accuracy: 0.9005 - val_loss: 0.3074 - val_accuracy: 0.8699\n",
      "Epoch 3/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.2026 - accuracy: 0.9205 - val_loss: 0.4182 - val_accuracy: 0.8344\n",
      "Epoch 4/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1805 - accuracy: 0.9282 - val_loss: 0.3198 - val_accuracy: 0.8750\n",
      "Epoch 5/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1565 - accuracy: 0.9402 - val_loss: 0.3593 - val_accuracy: 0.8631\n",
      "Epoch 6/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1387 - accuracy: 0.9490 - val_loss: 0.4957 - val_accuracy: 0.8244\n",
      "Epoch 7/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.1182 - accuracy: 0.9576 - val_loss: 0.4074 - val_accuracy: 0.8577\n",
      "Epoch 8/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.0982 - accuracy: 0.9653 - val_loss: 0.4127 - val_accuracy: 0.8598\n",
      "Epoch 9/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.0801 - accuracy: 0.9726 - val_loss: 0.4514 - val_accuracy: 0.8580\n",
      "Epoch 10/10\n",
      "196/196 [==============================] - 3s 17ms/step - loss: 0.0639 - accuracy: 0.9790 - val_loss: 0.5269 - val_accuracy: 0.8572\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x210210e5670>"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rnn_new.fit(x_train, y_train,\n",
    "          batch_size=128,\n",
    "          epochs=10,\n",
    "          validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "674fe9d8-d010-493d-beba-63c2f9ec22ce",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b82f96b-4fd9-4b9c-a077-ea9a787e0cfd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
